For the IWPR Q1 Update (Jan 2025)

Query and load IM3 data

Run this query_im3_scen("energy") only once to query from remote IM3 databases. Once a .dat file is created, we can load the existing project data by loadProject(proj = "im3scen_energy.dat").

# query the data
# im3_energy <- query_im3_scen("energy")
# load the data
im3_energy <- loadProject(proj = paste0("../", data_dir, "im3scen_energy.dat"))
# scenarios and queries 
listScenarios(im3_energy)
[1] "rcp45cooler_ssp3" "rcp45cooler_ssp5" "rcp45hotter_ssp3" "rcp45hotter_ssp5" "rcp85cooler_ssp3" "rcp85cooler_ssp5" "rcp85hotter_ssp3"
[8] "rcp85hotter_ssp5"
listQueries(im3_energy)
[1] "USA inputs by tech"                 "USA outputs by tech"                "inputs by subsector (non-electric)"
[4] "elec gen by subsector"              "USA regional natural gas outputs"   "elec energy input by subsector"    
# mappings 
source_mapping_e <- read_csv(paste0("../", data_dir, "mappings/source_mapping_e.csv"))
Rows: 88 Columns: 2-- Column specification -----------------------------------------------------------------------------------------------------------------------------
Delimiter: ","
chr (2): input, Source
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
target_mapping_e <- read_csv(paste0("../", data_dir, "mappings/target_mapping_e.csv"))
Rows: 104 Columns: 2-- Column specification -----------------------------------------------------------------------------------------------------------------------------
Delimiter: ","
chr (2): sector, Target
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
node_mapping_e <- read_csv(paste0("../", data_dir, "mappings/node_mapping_e.csv")) 
Rows: 19 Columns: 5-- Column specification -----------------------------------------------------------------------------------------------------------------------------
Delimiter: ","
chr (4): label, stage, hex, color_name
dbl (1): node
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.

Energy Sankey

# get queries 
inputsByTechUSA <- getQuery(im3_energy, "USA inputs by tech") 
outputsByTechUSA <- getQuery(im3_energy, "USA outputs by tech")

inputBySubsectorNonElec <- getQuery(im3_energy, 'inputs by subsector (non-electric)') %>% filter_CONUSregions() 
elecEnergyInputBySubsector <- getQuery(im3_energy, 'elec energy input by subsector') %>% filter(Units == "EJ") %>% filter_CONUSregions() # in case no filtering of ELEC_RPS credits
elecGenBySubsector <- getQuery(im3_energy, 'elec gen by subsector') %>% filter(Units == "EJ") %>% filter_CONUSregions() # in case no filtering of ELEC_RPS credits
natGasOutputs <- getQuery(im3_energy, 'USA regional natural gas outputs')
datatables_energy <- list(
  "inputsByTechUSA" = inputsByTechUSA,
  "outputsByTechUSA" = outputsByTechUSA,
  "inputBySubsectorNonElec" = inputBySubsectorNonElec,
  "elecEnergyInputBySubsector" = elecEnergyInputBySubsector,
  "elecGenBySubsector" = elecGenBySubsector,
  "natGasOutputs" = natGasOutputs
)

# print column names of each datatable
lapply(datatables_energy, function(x) colnames(x))
$inputsByTechUSA
[1] "Units"      "scenario"   "region"     "sector"     "subsector"  "technology" "input"      "year"       "value"     

$outputsByTechUSA
[1] "Units"      "scenario"   "region"     "sector"     "subsector"  "technology" "output"     "year"       "value"     

$inputBySubsectorNonElec
[1] "Units"     "scenario"  "region"    "sector"    "subsector" "input"     "year"      "value"    

$elecEnergyInputBySubsector
[1] "Units"     "scenario"  "region"    "sector"    "subsector" "input"     "year"      "value"    

$elecGenBySubsector
[1] "Units"     "scenario"  "region"    "subsector" "year"      "value"    

$natGasOutputs
[1] "Units"      "scenario"   "region"     "sector"     "technology" "output"     "year"       "value"     
# print the first few rows of each datatable
lapply(datatables_energy, function(x) (x))
$inputsByTechUSA

$outputsByTechUSA

$inputBySubsectorNonElec

$elecEnergyInputBySubsector

$elecGenBySubsector

$natGasOutputs
NA

Let’s process each piece to prepare the format of: scenario, source, target, year, value. Scenario and year could be filtered for each Sankey.

Non-electricity

unique((inputBySubsectorNonElec %>% remove_month_day_night_superpeak("sector"))$sector)
  [1] "biomass liquids"           "cement"                    "coal to liquids"           "comm cooking"              "comm cooling"             
  [6] "comm heating"              "comm hot water"            "comm lighting"             "comm non-building"         "comm office"              
 [11] "comm other"                "comm refrigeration"        "comm ventilation"          "delivered biomass"         "elect_td_ind"             
 [16] "elect_td_trn"              "gas to liquids"            "industrial energy use"     "industrial feedstocks"     "industry"                 
 [21] "oil refining"              "process heat cement"       "regional biomass"          "regional biomassOil"       "regional corn for ethanol"
 [26] "resid clothes dryers"      "resid clothes washers"     "resid computers"           "resid cooking"             "resid cooling"            
 [31] "resid dishwashers"         "resid freezers"            "resid furnace fans"        "resid heating"             "resid hot water"          
 [36] "resid lighting"            "resid other"               "resid refrigerators"       "resid televisions"         "trn_aviation_intl"        
 [41] "trn_freight"               "trn_freight_road"          "trn_pass"                  "trn_pass_road"             "trn_pass_road_LDV"        
 [46] "trn_pass_road_LDV_4W"      "trn_shipping_intl"         "N fertilizer"              "carbon-storage"            "municipal water"          
 [51] "water_td_an_C"             "water_td_an_W"             "water_td_dom_C"            "water_td_dom_W"            "water_td_elec_C"          
 [56] "water_td_elec_W"           "water_td_ind_C"            "water_td_ind_W"            "water_td_irr_TennR_C"      "water_td_irr_TennR_W"     
 [61] "water_td_irr_UsaCstSE_C"   "water_td_irr_UsaCstSE_W"   "water_td_pri_C"            "water_td_pri_W"            "water_td_irr_ArkWhtRedR_C"
 [66] "water_td_irr_ArkWhtRedR_W" "water_td_irr_MissppRS_C"   "water_td_irr_MissppRS_W"   "water_td_irr_MexCstNW_C"   "water_td_irr_MexCstNW_W"  
 [71] "water_td_irr_UsaColoRN_C"  "water_td_irr_UsaColoRN_W"  "water_td_irr_UsaColoRS_C"  "water_td_irr_UsaColoRS_W"  "water_td_irr_California_C"
 [76] "water_td_irr_California_W" "water_td_irr_GreatBasin_C" "water_td_irr_GreatBasin_W" "water_td_irr_UsaPacNW_C"   "water_td_irr_UsaPacNW_W"  
 [81] "water_td_irr_MissouriR_C"  "water_td_irr_MissouriR_W"  "water_td_irr_RioGrande_C"  "water_td_irr_RioGrande_W"  "water_td_irr_UsaCstE_C"   
 [86] "water_td_irr_UsaCstE_W"    "water_td_irr_UsaCstNE_C"   "water_td_irr_UsaCstNE_W"   "water_td_irr_Caribbean_C"  "water_td_irr_Caribbean_W" 
 [91] "water_td_irr_MissppRN_C"   "water_td_irr_MissppRN_W"   "water_td_irr_GreatLakes_C" "water_td_irr_GreatLakes_W" "water_td_irr_OhioR_C"     
 [96] "water_td_irr_OhioR_W"      "water_td_irr_TexasCst_C"   "water_td_irr_TexasCst_W"   "water_td_irr_NelsonR_C"    "water_td_irr_NelsonR_W"   
[101] "water_td_irr_FraserR_C"    "water_td_irr_FraserR_W"   
unique(inputBySubsectorNonElec$subsector)
 [1] "biomass liquids"           "cement"                    "coal to liquids"           "electricity"               "gas"                      
 [6] "biomass"                   "coal"                      "refined liquids"           "delivered biomass"         "elect_td_ind"             
[11] "elect_td_trn"              "gas to liquids"            "hydrogen"                  "industry"                  "oil refining"             
[16] "regional biomass"          "regional biomassOil"       "regional corn for ethanol" "International Aviation"    "Domestic Ship"            
[21] "Freight Rail"              "Heavy truck"               "Light truck"               "Medium truck"              "Cycle"                    
[26] "Domestic Aviation"         "HSR"                       "Passenger Rail"            "Walk"                      "Bus"                      
[31] "2W and 3W"                 "Car"                       "Large Car and Truck"       "International Ship"        "offshore carbon-storage"  
[36] "onshore carbon-storage"    "municipal water"           "South Atlantic Gulf"       "Tennessee River"           "Arkansas White Red"       
[41] "Lower Mississippi River"   "Lower Colorado River"      "Mexico-Northwest Coast"    "Upper Colorado River"      "California River"         
[46] "Great"                     "Pacific Northwest"         "Missouri River"            "Rio Grande River"          "Mid Atlantic"             
[51] "New England"               "Caribbean"                 "Upper Mississippi"         "Ohio River"                "Great Lakes"              
[56] "Texas Gulf Coast"          "Saskatchewan-Nelson"       "Fraser"                    "Pacific and Arctic Coast"  "road"                     
[61] "LDV"                       "4W"                       
unique((inputBySubsectorNonElec %>% remove_month_day_night_superpeak("input"))$input)
 [1] "elect_td_ind"                               "regional biomass"                           "regional biomassOil"                       
 [4] "regional corn for ethanol"                  "wholesale gas"                              "process heat cement"                       
 [7] "regional coal"                              "elect_td_bld"                               "delivered gas"                             
[10] "electricity domestic supply"                "delivered biomass"                          "delivered coal"                            
[13] "refined liquids enduse"                     "regional natural gas"                       "H2 enduse"                                 
[16] "refined liquids industrial"                 "oil-credits"                                "industrial energy use"                     
[19] "industrial feedstocks"                      "industrial processes"                       "regional oil"                              
[22] "regional oilcrop"                           "regional corn"                              "renewable"                                 
[25] "elect_td_trn"                               "limestone"                                  "offshore carbon-storage"                   
[28] "onshore carbon-storage"                     "water_td_ind_C"                             "water_td_ind_W"                            
[31] "water_td_dom_C"                             "water_td_dom_W"                             "South Atlantic Gulf_water consumption"     
[34] "Tennessee River_water consumption"          "South Atlantic Gulf_water withdrawals"      "Tennessee River_water withdrawals"         
[37] "Arkansas White Red_water consumption"       "Lower Mississippi River_water consumption"  "Arkansas White Red_water withdrawals"      
[40] "Lower Mississippi River_water withdrawals"  "Lower Colorado River_water consumption"     "Mexico-Northwest Coast_water consumption"  
[43] "Upper Colorado River_water consumption"     "Lower Colorado River_water withdrawals"     "Mexico-Northwest Coast_water withdrawals"  
[46] "Upper Colorado River_water withdrawals"     "California River_water consumption"         "Great_water consumption"                   
[49] "Pacific Northwest_water consumption"        "California River_water withdrawals"         "desalination"                              
[52] "Great_water withdrawals"                    "Pacific Northwest_water withdrawals"        "Missouri River_water consumption"          
[55] "Rio Grande River_water consumption"         "Missouri River_water withdrawals"           "Rio Grande River_water withdrawals"        
[58] "Mid Atlantic_water consumption"             "New England_water consumption"              "Mid Atlantic_water withdrawals"            
[61] "New England_water withdrawals"              "Caribbean_water consumption"                "Caribbean_water withdrawals"               
[64] "Upper Mississippi_water consumption"        "Upper Mississippi_water withdrawals"        "Ohio River_water consumption"              
[67] "Ohio River_water withdrawals"               "Great Lakes_water consumption"              "Great Lakes_water withdrawals"             
[70] "Texas Gulf Coast_water consumption"         "Texas Gulf Coast_water withdrawals"         "Saskatchewan-Nelson_water consumption"     
[73] "Saskatchewan-Nelson_water withdrawals"      "Fraser_water consumption"                   "Pacific and Arctic Coast_water consumption"
[76] "Fraser_water withdrawals"                   "Pacific and Arctic Coast_water withdrawals" "trn_pass_road"                             
[79] "trn_pass_road_LDV"                          "trn_pass_road_LDV_4W"                       "trn_freight_road"                          
# map non electricity energy flows to major aggregated categories based on the mapping file 

inputs_by_subsector_nonelec <- inputBySubsectorNonElec %>% 
  filter(Units == 'EJ') %>% # only take energy flows
  filter(!input %in% c('regional corn', 'regional soybean')) %>% # remove crop inputs 
  # aggregate all monthly_day combinations to one category e.g., electricity domestic supply_Nov_day to electricity domestic supply
  remove_month_day_night_superpeak("sector") %>% remove_month_day_night_superpeak("input") %>%
  rbind(inputsByTechUSA %>% filter(str_detect(sector, "H2 ")) %>% select(-technology)) %>% # add H3 flows from the USA region level as IM3 doesn't model H2 at state level
  left_join(source_mapping_e, by = 'input') %>%
  left_join(target_mapping_e, by = 'sector')

# Note there are NAs in the output due to missing mappings or sectors that are
# not supposed to be targets and inputs that are not supposed# to be sources
# get hydrogen flows from USA region since IM3 version doesn't model H2 at state level 
inputsByTechUSA %>%# filter all inputs that have "H2 " in it
  filter(str_detect(sector, "H2")) %>%
  select(sector) %>% unique()
NA
NA
# things that were remapped as sources
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$sector)
 [1] "biomass liquids"         "cement"                  "coal to liquids"         "comm cooking"            "comm cooling"           
 [6] "comm heating"            "comm hot water"          "comm lighting"           "comm non-building"       "comm office"            
[11] "comm other"              "comm refrigeration"      "comm ventilation"        "delivered biomass"       "elect_td_ind"           
[16] "elect_td_trn"            "gas to liquids"          "industrial energy use"   "industrial feedstocks"   "industry"               
[21] "oil refining"            "process heat cement"     "regional biomass"        "resid clothes dryers"    "resid clothes washers"  
[26] "resid computers"         "resid cooking"           "resid cooling"           "resid dishwashers"       "resid freezers"         
[31] "resid furnace fans"      "resid heating"           "resid hot water"         "resid lighting"          "resid other"            
[36] "resid refrigerators"     "resid televisions"       "trn_aviation_intl"       "trn_freight"             "trn_freight_road"       
[41] "trn_pass"                "trn_pass_road"           "trn_pass_road_LDV"       "trn_pass_road_LDV_4W"    "trn_shipping_intl"      
[46] "N fertilizer"            "H2 central production"   "H2 distribution"         "H2 enduse"               "H2 forecourt production"
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$subsector)
 [1] "biomass liquids"         "cement"                  "coal to liquids"         "electricity"             "gas"                    
 [6] "biomass"                 "coal"                    "refined liquids"         "delivered biomass"       "elect_td_ind"           
[11] "elect_td_trn"            "gas to liquids"          "hydrogen"                "industry"                "oil refining"           
[16] "regional biomass"        "International Aviation"  "Domestic Ship"           "Freight Rail"            "Heavy truck"            
[21] "Light truck"             "Medium truck"            "Domestic Aviation"       "HSR"                     "Passenger Rail"         
[26] "Bus"                     "2W and 3W"               "Car"                     "Large Car and Truck"     "International Ship"     
[31] "nuclear"                 "H2 distribution"         "H2 forecourt production"
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$input) # look at this
 [1] "elect_td_ind"                "regional biomass"            "regional biomassOil"         "regional corn for ethanol"  
 [5] "wholesale gas"               "process heat cement"         "regional coal"               "elect_td_bld"               
 [9] "delivered gas"               "electricity domestic supply" "delivered biomass"           "delivered coal"             
[13] "refined liquids enduse"      "regional natural gas"        "H2 enduse"                   "refined liquids industrial" 
[17] "industrial energy use"       "industrial feedstocks"       "industrial processes"        "regional oil"               
[21] "elect_td_trn"                "nuclearFuelGenIII"           "H2 central production"       "H2 distribution"            
[25] "H2 forecourt production"    
# things there were NOT mapped as sources 
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$sector)
[1] "industrial feedstocks" "regional biomassOil"   "trn_pass"             
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$subsector)
[1] "refined liquids"     "regional biomassOil" "Cycle"               "Walk"               
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$input) # look at this 
[1] "oil-credits"      "regional oilcrop" "renewable"       
# things that were remapped as targets
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$sector) # look at this
 [1] "biomass liquids"         "cement"                  "coal to liquids"         "comm cooking"            "comm cooling"           
 [6] "comm heating"            "comm hot water"          "comm lighting"           "comm non-building"       "comm office"            
[11] "comm other"              "comm refrigeration"      "comm ventilation"        "delivered biomass"       "elect_td_ind"           
[16] "elect_td_trn"            "gas to liquids"          "industrial energy use"   "industrial feedstocks"   "industry"               
[21] "oil refining"            "process heat cement"     "regional biomass"        "regional biomassOil"     "resid clothes dryers"   
[26] "resid clothes washers"   "resid computers"         "resid cooking"           "resid cooling"           "resid dishwashers"      
[31] "resid freezers"          "resid furnace fans"      "resid heating"           "resid hot water"         "resid lighting"         
[36] "resid other"             "resid refrigerators"     "resid televisions"       "trn_aviation_intl"       "trn_freight"            
[41] "trn_freight_road"        "trn_pass"                "trn_pass_road"           "trn_pass_road_LDV"       "trn_pass_road_LDV_4W"   
[46] "trn_shipping_intl"       "N fertilizer"            "H2 central production"   "H2 distribution"         "H2 enduse"              
[51] "H2 forecourt production"
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$subsector)
 [1] "biomass liquids"         "cement"                  "coal to liquids"         "electricity"             "gas"                    
 [6] "biomass"                 "coal"                    "refined liquids"         "delivered biomass"       "elect_td_ind"           
[11] "elect_td_trn"            "gas to liquids"          "hydrogen"                "industry"                "oil refining"           
[16] "regional biomass"        "regional biomassOil"     "International Aviation"  "Domestic Ship"           "Freight Rail"           
[21] "Heavy truck"             "Light truck"             "Medium truck"            "Cycle"                   "Domestic Aviation"      
[26] "HSR"                     "Passenger Rail"          "Walk"                    "Bus"                     "2W and 3W"              
[31] "Car"                     "Large Car and Truck"     "International Ship"      "nuclear"                 "H2 distribution"        
[36] "H2 forecourt production"
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$input)
 [1] "elect_td_ind"                "regional biomass"            "regional biomassOil"         "regional corn for ethanol"  
 [5] "wholesale gas"               "process heat cement"         "regional coal"               "elect_td_bld"               
 [9] "delivered gas"               "electricity domestic supply" "delivered biomass"           "delivered coal"             
[13] "refined liquids enduse"      "regional natural gas"        "H2 enduse"                   "refined liquids industrial" 
[17] "oil-credits"                 "industrial energy use"       "industrial feedstocks"       "industrial processes"       
[21] "regional oil"                "regional oilcrop"            "renewable"                   "elect_td_trn"               
[25] "nuclearFuelGenIII"           "H2 central production"       "H2 distribution"             "H2 forecourt production"    
# things that were NOT remapped as targets
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$sector) # look at this
character(0)
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$subsector)
character(0)
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$input)
character(0)
# check for unmatched sources
inputs_by_subsector_nonelec_unmatched_source <- inputs_by_subsector_nonelec %>% 
  filter(is.na(Source)) %>% 
  select(scenario, sector, subsector, input, Source, Target) %>% 
  unique()

unique(inputs_by_subsector_nonelec_unmatched_source$input)
[1] "oil-credits"      "regional oilcrop" "renewable"       
unmatched_sources <- c("oil-credits", "renewable", "regional oilcrop", "process heat cement", "process heat dac")

if(! all(inputs_by_subsector_nonelec_unmatched_source$input %in% unmatched_sources )){
  unmatched <- setdiff(inputs_by_subsector_nonelec_unmatched_source$input, unmatched_sources)
  stop(paste0("Unmatched Sources in inputs by subsector nonelec. Check Source mapping file against gcam data: ", paste(unmatched, collapse = ' - ')))
}
# check for unmatched targets
inputs_by_subsector_nonelec_unmatched_target <- inputs_by_subsector_nonelec %>% 
  filter(is.na(Target)) %>% 
  select(scenario, sector, subsector, input, Source, Target) %>% 
  unique

unique(inputs_by_subsector_nonelec_unmatched_target$sector)
character(0)
unmatched_targets <- c("H2 central production", 
                       "H2 liquid truck",
                       "H2 pipeline",
                       "H2 wholesale delivery" #all intermediate hydrogen markets that are double counting - only want H2 industrial and H2 MHDV
                       )
if(! all(inputs_by_subsector_nonelec_unmatched_target$sector %in% unmatched_targets)){
  unmatched <- setdiff(inputs_by_subsector_nonelec_unmatched_target$sector, unmatched_targets)
  stop(paste0("Unmatched Sources in inputs by subsector nonelec. Check Source mapping file against gcam data: ", paste(unmatched, collapse = ' - ')))
}

Get other flows such as gas processing and backup electricity

gas_processing_flows <- inputsByTechUSA %>%
  filter(sector == "gas processing") %>%
  left_join(source_mapping_e, by = "input") %>%
  left_join(target_mapping_e, by = "sector") %>%
  group_by(scenario, Units, year, Source, Target) %>%
  summarize(value = sum(value)) %>%
  ungroup()
`summarise()` has grouped output by 'scenario', 'Units', 'year', 'Source'. You can override using the `.groups` argument.
backup <- inputsByTechUSA %>%
  filter(sector %in% c("backup_electricity", "csp_backup")) %>%
  left_join(source_mapping_e, by = "input") %>%
  left_join(target_mapping_e, by = "sector") %>%
  group_by(scenario, Units, year, Source, Target) %>%
  summarize(value = sum(value)) %>%
  ungroup()
`summarise()` has grouped output by 'scenario', 'Units', 'year', 'Source'. You can override using the `.groups` argument.

Electricity

elec_energy_by_subsector <- elecEnergyInputBySubsector %>% 
  filter(Units == 'EJ') %>%
  filter(!input %in% c('backup_electricity', 'csp_backup'),
         !subsector %in% c("nuclear", "geothermal")) %>% #don't want to double count electricity from backup, and nuclear and geothermal are reported from output
  left_join(target_mapping_e, by = 'sector') %>% 
  left_join(source_mapping_e, by = 'input') 

#hydropower is only available as an output. In the "direct equivalent" reporting convention used here, input = output
hydro_power <- elecGenBySubsector %>%
  filter(subsector == 'hydro') %>%
  mutate(Source = 'Hydropower',
         Target = 'Electricity')

# nuclear's reported thermal inputs assume a 3:1 conversion, so for "direct equivalent" reporting we use the output
nuclear <- elecGenBySubsector %>%
  filter(subsector == 'nuclear') %>%
  mutate(Source = 'Nuclear',
         Target = 'Electricity') 

# geothermal's reported thermal inputs assume a 10:1 conversion, so for "direct equivalent" reporting we use the output
geothermal <- elecGenBySubsector %>%
  filter(subsector == 'geothermal') %>%
  mutate(Source = 'Geothermal',
         Target = 'Electricity')
# put everything together
all_energy <- inputs_by_subsector_nonelec %>% 
  bind_rows(gas_processing_flows) %>% 
  bind_rows(backup) %>%
  bind_rows(elec_energy_by_subsector) %>% 
  bind_rows(hydro_power) %>% 
  bind_rows(nuclear) %>%
  bind_rows(geothermal)

Source_Target_all <- all_energy %>% 
  group_by(scenario, Units, Source, Target, year) %>%
  summarise(value = sum(value))  %>% 
  filter( Source != Target) %>% 
  filter( Target != 'Biomass') %>% 
  ungroup() 
`summarise()` has grouped output by 'scenario', 'Units', 'Source', 'Target'. You can override using the `.groups` argument.
datatable(Source_Target_all, filter = 'top', rownames = FALSE)
# calculate losses 
# take the different of the sum of sources of a node and the sum of targets of a node and assign it to Losses target node. The Source would be the node it self. 

losses <- Source_Target_all %>%
  # cather data to have one column for "direction" (Source/Target) and one for "node"
  pivot_longer(cols = c(Source, Target), 
               names_to = "direction", 
               values_to = "node") %>%
  # only calculate for mid-tier/transformation flows
  left_join(node_mapping_e %>% filter(stage == "mid") %>% select("node" = "label", stage), by = "node" ) %>%
  filter(!is.na(stage)) %>%
  group_by(scenario, year, Units, node, direction) %>%
  summarize(total_value = sum(value), .groups = "drop") %>%
  pivot_wider(names_from = direction, values_from = total_value, values_fill = 0) %>%
  mutate(losses = Target - Source) %>%
  # filter(losses > 0) %>%
  # create the "Losses" rows with losses as target and the node as Source
  mutate(Source = node, Target = "Losses", value = losses) %>%
  select(scenario, Units, Source, Target, year, value) 

# add the losses back to the original dataset and complete it 
Source_Target_all_losses <- Source_Target_all %>% bind_rows(losses) %>% select(-Units) %>% 
  complete(scenario, year, nesting(Source, Target), fill = list(value = 0)) %>% mutate(units = "EJ")
# energy losses plot 
if (F) {
  losses %>% 
  ggplot(aes(x = year, y = value, color = Source, linetype = Source)) +
  geom_line(size = 1) +
  scale_color_manual(values = node_mapping_e %>% filter(stage == "mid") %>% pull(hex)) +
  facet_wrap(~scenario, nrow = 2) +
  labs(title = "Energy Efficiency Losses: IM3 scenarios", x = "Year", y = "Losses (EJ)") +
  theme_bw() 
}

Plotting

scenario_name <- "rcp45cooler_ssp3"
plot_scenario_name <- 'RCP 4.5 Cooler SSP3'

select_year <- '2050'
gcam_data_unit <- 'EJ'

# sankey formatting
link_alpha <- .5

# source/target mapping

node_mapping_in <- node_mapping_e

# GCAM data
gcam_data <- Source_Target_all_losses %>% 
  filter(scenario == scenario_name) %>% filter( year == select_year) %>% select(-scenario)

all_links <- c(gcam_data$Source, gcam_data$Target) %>% unique()

node_mapping_e <- node_mapping_in %>% filter(label %in% all_links)

node_mapping_e$node <- 0:(nrow(node_mapping_e)-1)

# process node data
links_data <- gcam_data %>% 
  # filter(Source %in% c("Hydropower", "Solar")) %>% 
  select(Source, Target, value) %>% 
  # mutate(Target = ifelse(str_detect(Target, 'Ind'), 'Industry', Target)) %>% 
  group_by(Source, Target) %>% 
  summarize(value = sum(value)) %>% 
  ungroup() %>% 
  rename(Source_label = Source,
         Target_label = Target) %>% 
  left_join(node_mapping_e %>% select(label, node), by = c('Source_label' = 'label')) %>% 
  rename(Source_node = node) %>% 
  left_join(node_mapping_e %>% select(label, node), by = c('Target_label' = 'label')) %>% 
  rename(Target_node = node) %>% 
  left_join(node_mapping_e %>% select(label, stage, hex, color_name), by = c('Source_label' = 'label')) %>% 
  mutate(rgb = apply(FUN = paste, MARGIN = 2, X = col2rgb(hex), collapse = ',')) %>% 
  mutate(rgba = paste0('rgba(', rgb, ', ', link_alpha,')')) %>% 
  mutate(link_label = paste(Source_label, round(value, digits = 1),'EJ')) %>% 
  filter(value>0) %>% 
  arrange(Source_node)
`summarise()` has grouped output by 'Source'. You can override using the `.groups` argument.
datatable(links_data, filter = 'top', rownames = FALSE, options = list(pageLength = 10, scrollX = TRUE))
# process node percent labels

# source
source_sum <- links_data %>% 
  select(Source_label, value) %>% 
  left_join(node_mapping_e %>% select(label, stage), by = c('Source_label' = 'label')) %>% 
  rename(label=Source_label) %>% 
  filter(tolower(stage) == 'source') %>% 
  group_by(label, stage) %>% 
  summarize(node_sum = sum(value))
`summarise()` has grouped output by 'label'. You can override using the `.groups` argument.
source_total <- source_sum %>% 
  pull(node_sum) %>% sum

source_percent <- source_sum %>% 
  mutate(percent = node_sum/source_total*100) %>% 
  left_join(node_mapping_e) %>% 
  arrange(node) %>% 
  mutate(x = .01) %>% 
  mutate(csum_norm = source_total)
Joining with `by = join_by(label, stage)`
source_percent$csum <- cumsum(source_percent$node_sum)
source_percent$start <- lag(source_percent$csum)

# target
target_sum <- links_data %>% 
  select(Target_label, value) %>% 
  left_join(node_mapping_e %>% select(label, stage), by = c('Target_label' = 'label')) %>% 
  rename(label=Target_label) %>% 
  filter(stage == 'target') %>% 
  group_by(label, stage) %>% 
  summarize(node_sum = sum(value))
`summarise()` has grouped output by 'label'. You can override using the `.groups` argument.
target_total <- target_sum %>% 
  pull(node_sum) %>% sum

target_percent <- target_sum %>% 
  mutate(percent = node_sum/target_total*100) %>% 
  left_join(node_mapping_e) %>% 
  arrange(node) %>% 
  mutate(x = .95) %>% 
  mutate(csum_norm = target_total)
Joining with `by = join_by(label, stage)`
target_percent$csum <- cumsum(target_percent$node_sum)
target_percent$start <- lag(target_percent$csum)

# Intermediate Carriers Flows in
intermediate_nodes <- node_mapping_e %>% filter(stage == 'mid') %>% pull(label)
 intermediate_flows_in_total <- links_data %>%
   filter(Target_label %in% intermediate_nodes) %>% 
   group_by(Target_label) %>% 
   summarize(node_sum = sum(value))
 
intermediate_percent <- intermediate_flows_in_total %>% 
 rename(label = Target_label) %>% 
 mutate(stage = 'mid') %>% 
 mutate(percent =node_sum/source_total*100) %>% 
 left_join(node_mapping_e)
Joining with `by = join_by(label, stage)`
intermediate_total <- intermediate_percent %>% pull(node_sum) %>% sum

intermediate_flows_out_total <- links_data %>%
 filter(Source_label %in% intermediate_nodes) %>% 
 group_by(Source_label) %>% 
 summarize(value = sum(value))
  
# process node locations 

# final node info
nodes_data <- bind_rows(source_percent, intermediate_percent, target_percent) %>%
  arrange(node) %>%
  replace_na(list(start = 0)) %>% 
  mutate(mid_point = (start+csum)/2) %>% 
  mutate(y = mid_point/csum_norm) %>% 
  mutate(y = ifelse(label == 'Gas', 0.5,
                    ifelse(label == 'Liquid Fuels', 0.2,
                    ifelse(label == 'Electricity', 0.6,
                    ifelse(label == 'Hydrogen',0.9,y))))) %>% 
  mutate(x = ifelse(label == 'Gas', 0.25,
                    ifelse(label == 'Liquid Fuels', 0.4,
                           ifelse(label == 'Electricity', 0.6,
                                  ifelse(label == 'Hydrogen', 0.7,x))))) %>%
  mutate(node_label = ifelse(is.na(node_sum), label, 
                               paste0(label, ' ',round(node_sum, digits = 1) , gcam_data_unit, 
                                      ' ', round(percent, digits = 1),'%'))) 
  

# Check that Source and Targets in Links are in the node mapping

if( any(is.na(links_data$Source_node)) ) stop("Check Source number mapping - NA's")
if( any(is.na(links_data$Target_node)) ) stop("Check Target number mapping - NA's")
  
datatable(nodes_data, filter = 'top', rownames = FALSE, options = list(pageLength = 20, scrollX = TRUE))
# save files for Kendall
names(Source_Target_all_losses) <- tolower(names(Source_Target_all_losses)) 
write_csv(Source_Target_all_losses %>% select(scenario, source, target, year, value, units), paste0("../", data_dir, 'allenergy_source_target.csv'))
# write_csv(nodes_data, paste0("../", data_dir, 'allenergy_nodes_data.csv'))
# write_csv(links_data, paste0("../", data_dir, 'allenergy_links_data.csv'))
# plot sankey
sankey_figure <- plot_ly( 
      type = "sankey",
      # arrangement = "snap",
      domain = list(x =  c(0,1),y =  c(0,1)),
      orientation = "h",
      valueformat = ".0f",
      valuesuffix = gcam_data_unit,

# Nodes  
      node = list( label = nodes_data %>% pull(node_label),
                   color = nodes_data %>% pull(hex),
                   x = nodes_data %>% pull(x),
                   y = nodes_data %>% pull(y),
                   pad = 3,
                   thickness = 15,
                   line = list(color = "black",width = 0.5)),
  
# Links
      link = list(source = links_data$Source_node,
                  target = links_data$Target_node,
                  value =  links_data$value,
                  color =  links_data$rgba)
) 

# add Formatting
plot_title <- paste0('Energy - ', plot_scenario_name, ' - ',select_year)
sankey_figure <- sankey_figure %>% layout(
  title = plot_title,
  font = list(size = 11),
  xaxis = list(showgrid = F, zeroline = F),
  yaxis = list(showgrid = F, zeroline = F))

sankey_figure
NA
NA
---
title: "Energy Flows from the IM3 GCAM-USA Scenarios"
author: "Hassan Niazi (hassan.niazi@pnnl.gov) | Adapted from the work of Rachel Hoesly"
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y')`"
output:
  html_notebook:
    toc: true
    # toc_float: TRUE
  html_document:
    toc: true
    df_print: paged
---

```{r setup, include=FALSE, warning=FALSE}
# by default collapse/hide the code
# knitr::opts_chunk$set(echo = FALSE)
# set working directory to one folder up
setwd("../")
# getwd()
source("./R/functions.R")
```

### For the IWPR Q1 Update (Jan 2025)

-   Goal: plot an all energy sankey for Q1 update of the EW-Flows project

### Query and load IM3 data

Run this `query_im3_scen("energy")` only once to query from remote IM3 databases. Once a `.dat` file is created, we can load the existing project data by `loadProject(proj = "im3scen_energy.dat")`.

```{r message=FALSE, warning=FALSE}
# query the data
# im3_energy <- query_im3_scen("energy")
```

```{r, warning = FALSE}
# load the data
im3_energy <- loadProject(proj = paste0("../", data_dir, "im3scen_energy.dat"))
```

```{r}
# scenarios and queries 
listScenarios(im3_energy)
listQueries(im3_energy)
```


```{r, warning = FALSE}
# mappings 
source_mapping_e <- read_csv(paste0("../", data_dir, "mappings/source_mapping_e.csv"))
target_mapping_e <- read_csv(paste0("../", data_dir, "mappings/target_mapping_e.csv"))
node_mapping_e <- read_csv(paste0("../", data_dir, "mappings/node_mapping_e.csv")) 
```


### Energy Sankey

```{r, warning = FALSE}
# get queries 
inputsByTechUSA <- getQuery(im3_energy, "USA inputs by tech") 
outputsByTechUSA <- getQuery(im3_energy, "USA outputs by tech")

inputBySubsectorNonElec <- getQuery(im3_energy, 'inputs by subsector (non-electric)') %>% filter_CONUSregions() 
elecEnergyInputBySubsector <- getQuery(im3_energy, 'elec energy input by subsector') %>% filter(Units == "EJ") %>% filter_CONUSregions() # in case no filtering of ELEC_RPS credits
elecGenBySubsector <- getQuery(im3_energy, 'elec gen by subsector') %>% filter(Units == "EJ") %>% filter_CONUSregions() # in case no filtering of ELEC_RPS credits
natGasOutputs <- getQuery(im3_energy, 'USA regional natural gas outputs')

```

```{r, messsage = T, warning = FALSE}
datatables_energy <- list(
  "inputsByTechUSA" = inputsByTechUSA,
  "outputsByTechUSA" = outputsByTechUSA,
  "inputBySubsectorNonElec" = inputBySubsectorNonElec,
  "elecEnergyInputBySubsector" = elecEnergyInputBySubsector,
  "elecGenBySubsector" = elecGenBySubsector,
  "natGasOutputs" = natGasOutputs
)

# print column names of each datatable
lapply(datatables_energy, function(x) colnames(x))

# print the first few rows of each datatable
lapply(datatables_energy, function(x) (x))
```

Let's process each piece to prepare the format of: scenario, source, target, year, value. Scenario and year could be filtered for each Sankey.

#### Non-electricity 


```{r fig.width=8, warning=FALSE}
unique((inputBySubsectorNonElec %>% remove_month_day_night_superpeak("sector"))$sector)
unique(inputBySubsectorNonElec$subsector)
unique((inputBySubsectorNonElec %>% remove_month_day_night_superpeak("input"))$input)
```


```{r fig.width=8, warning=FALSE}
# map non electricity energy flows to major aggregated categories based on the mapping file 

inputs_by_subsector_nonelec <- inputBySubsectorNonElec %>% 
  filter(Units == 'EJ') %>% # only take energy flows
  filter(!input %in% c('regional corn', 'regional soybean')) %>% # remove crop inputs 
  # aggregate all monthly_day combinations to one category e.g., electricity domestic supply_Nov_day to electricity domestic supply
  remove_month_day_night_superpeak("sector") %>% remove_month_day_night_superpeak("input") %>%
  rbind(inputsByTechUSA %>% filter(str_detect(sector, "H2 ")) %>% select(-technology)) %>% # add H3 flows from the USA region level as IM3 doesn't model H2 at state level
  left_join(source_mapping_e, by = 'input') %>%
  left_join(target_mapping_e, by = 'sector')

# Note there are NAs in the output due to missing mappings or sectors that are
# not supposed to be targets and inputs that are not supposed# to be sources
```


```{r fig.width=8, warning=FALSE}
# get hydrogen flows from USA region since IM3 version doesn't model H2 at state level 
inputsByTechUSA %>%# filter all inputs that have "H2 " in it
  filter(str_detect(sector, "H2")) %>%
  select(sector) %>% unique()


```


```{r fig.width=8, warning=FALSE}
# things that were remapped as sources
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$sector)
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$subsector)
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Source)))$input) # look at this
```

```{r fig.width=8, warning=FALSE}
# things there were NOT mapped as sources 
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$sector)
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$subsector)
unique((inputs_by_subsector_nonelec %>% filter(is.na(Source)))$input) # look at this 
```


```{r fig.width=8, warning=FALSE}
# things that were remapped as targets
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$sector) # look at this
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$subsector)
unique((inputs_by_subsector_nonelec %>% filter(!is.na(Target)))$input)
```


```{r fig.width=8, warning=FALSE}
# things that were NOT remapped as targets
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$sector) # look at this
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$subsector)
unique((inputs_by_subsector_nonelec %>% filter(is.na(Target)))$input)
```


```{r fig.width=8, warning=FALSE}
# check for unmatched sources
inputs_by_subsector_nonelec_unmatched_source <- inputs_by_subsector_nonelec %>% 
  filter(is.na(Source)) %>% 
  select(scenario, sector, subsector, input, Source, Target) %>% 
  unique()

unique(inputs_by_subsector_nonelec_unmatched_source$input)

unmatched_sources <- c("oil-credits", "renewable", "regional oilcrop", "process heat cement", "process heat dac")

if(! all(inputs_by_subsector_nonelec_unmatched_source$input %in% unmatched_sources )){
  unmatched <- setdiff(inputs_by_subsector_nonelec_unmatched_source$input, unmatched_sources)
  stop(paste0("Unmatched Sources in inputs by subsector nonelec. Check Source mapping file against gcam data: ", paste(unmatched, collapse = ' - ')))
}
```


```{r fig.width=8, warning=FALSE}
# check for unmatched targets
inputs_by_subsector_nonelec_unmatched_target <- inputs_by_subsector_nonelec %>% 
  filter(is.na(Target)) %>% 
  select(scenario, sector, subsector, input, Source, Target) %>% 
  unique

unique(inputs_by_subsector_nonelec_unmatched_target$sector)

unmatched_targets <- c("H2 central production", 
                       "H2 liquid truck",
                       "H2 pipeline",
                       "H2 wholesale delivery" #all intermediate hydrogen markets that are double counting - only want H2 industrial and H2 MHDV
                       )
if(! all(inputs_by_subsector_nonelec_unmatched_target$sector %in% unmatched_targets)){
  unmatched <- setdiff(inputs_by_subsector_nonelec_unmatched_target$sector, unmatched_targets)
  stop(paste0("Unmatched Sources in inputs by subsector nonelec. Check Source mapping file against gcam data: ", paste(unmatched, collapse = ' - ')))
}


```

Get other flows such as gas processing and backup electricity

```{r fig.width=8, warning=FALSE}
gas_processing_flows <- inputsByTechUSA %>%
  filter(sector == "gas processing") %>%
  left_join(source_mapping_e, by = "input") %>%
  left_join(target_mapping_e, by = "sector") %>%
  group_by(scenario, Units, year, Source, Target) %>%
  summarize(value = sum(value)) %>%
  ungroup()

backup <- inputsByTechUSA %>%
  filter(sector %in% c("backup_electricity", "csp_backup")) %>%
  left_join(source_mapping_e, by = "input") %>%
  left_join(target_mapping_e, by = "sector") %>%
  group_by(scenario, Units, year, Source, Target) %>%
  summarize(value = sum(value)) %>%
  ungroup()
```

#### Electricity

```{r fig.width=8, warning=FALSE}
elec_energy_by_subsector <- elecEnergyInputBySubsector %>% 
  filter(Units == 'EJ') %>%
  filter(!input %in% c('backup_electricity', 'csp_backup'),
         !subsector %in% c("nuclear", "geothermal")) %>% #don't want to double count electricity from backup, and nuclear and geothermal are reported from output
  left_join(target_mapping_e, by = 'sector') %>% 
  left_join(source_mapping_e, by = 'input') 

#hydropower is only available as an output. In the "direct equivalent" reporting convention used here, input = output
hydro_power <- elecGenBySubsector %>%
  filter(subsector == 'hydro') %>%
  mutate(Source = 'Hydropower',
         Target = 'Electricity')

# nuclear's reported thermal inputs assume a 3:1 conversion, so for "direct equivalent" reporting we use the output
nuclear <- elecGenBySubsector %>%
  filter(subsector == 'nuclear') %>%
  mutate(Source = 'Nuclear',
         Target = 'Electricity') 

# geothermal's reported thermal inputs assume a 10:1 conversion, so for "direct equivalent" reporting we use the output
geothermal <- elecGenBySubsector %>%
  filter(subsector == 'geothermal') %>%
  mutate(Source = 'Geothermal',
         Target = 'Electricity')
```


```{r fig.width=8, warning=FALSE}
# put everything together
all_energy <- inputs_by_subsector_nonelec %>% 
  bind_rows(gas_processing_flows) %>% 
  bind_rows(backup) %>%
  bind_rows(elec_energy_by_subsector) %>% 
  bind_rows(hydro_power) %>% 
  bind_rows(nuclear) %>%
  bind_rows(geothermal)

Source_Target_all <- all_energy %>% 
  group_by(scenario, Units, Source, Target, year) %>%
  summarise(value = sum(value))  %>% 
  filter( Source != Target) %>% 
  filter( Target != 'Biomass') %>% 
  ungroup() 

datatable(Source_Target_all, filter = 'top', rownames = FALSE)
```


```{r fig.width=8, warning=FALSE}
# calculate losses 
# take the different of the sum of sources of a node and the sum of targets of a node and assign it to Losses target node. The Source would be the node it self. 

losses <- Source_Target_all %>%
  # cather data to have one column for "direction" (Source/Target) and one for "node"
  pivot_longer(cols = c(Source, Target), 
               names_to = "direction", 
               values_to = "node") %>%
  # only calculate for mid-tier/transformation flows
  left_join(node_mapping_e %>% filter(stage == "mid") %>% select("node" = "label", stage), by = "node" ) %>%
  filter(!is.na(stage)) %>%
  group_by(scenario, year, Units, node, direction) %>%
  summarize(total_value = sum(value), .groups = "drop") %>%
  pivot_wider(names_from = direction, values_from = total_value, values_fill = 0) %>%
  mutate(losses = Target - Source) %>%
  # filter(losses > 0) %>%
  # create the "Losses" rows with losses as target and the node as Source
  mutate(Source = node, Target = "Losses", value = losses) %>%
  select(scenario, Units, Source, Target, year, value) 

# add the losses back to the original dataset and complete it 
Source_Target_all_losses <- Source_Target_all %>% bind_rows(losses) %>% select(-Units) %>% 
  complete(scenario, year, nesting(Source, Target), fill = list(value = 0)) %>% mutate(units = "EJ")

```


```{r fig.width=8, warning=FALSE}
# energy losses plot 
if (F) {
  losses %>% 
  ggplot(aes(x = year, y = value, color = Source, linetype = Source)) +
  geom_line(size = 1) +
  scale_color_manual(values = node_mapping_e %>% filter(stage == "mid") %>% pull(hex)) +
  facet_wrap(~scenario, nrow = 2) +
  labs(title = "Energy Efficiency Losses: IM3 scenarios", x = "Year", y = "Losses (EJ)") +
  theme_bw() 
}

```

#### Plotting 

```{r fig.width=8, warning=FALSE}
scenario_name <- "rcp45cooler_ssp3"
plot_scenario_name <- 'RCP 4.5 Cooler SSP3'

select_year <- '2050'
gcam_data_unit <- 'EJ'

# sankey formatting
link_alpha <- .5

# source/target mapping

node_mapping_in <- node_mapping_e

# GCAM data
gcam_data <- Source_Target_all_losses %>% 
  filter(scenario == scenario_name) %>% filter( year == select_year) %>% select(-scenario)

all_links <- c(gcam_data$Source, gcam_data$Target) %>% unique()

node_mapping_e <- node_mapping_in %>% filter(label %in% all_links)

node_mapping_e$node <- 0:(nrow(node_mapping_e)-1)

# process node data
links_data <- gcam_data %>% 
  # filter(Source %in% c("Hydropower", "Solar")) %>% 
  select(Source, Target, value) %>% 
  # mutate(Target = ifelse(str_detect(Target, 'Ind'), 'Industry', Target)) %>% 
  group_by(Source, Target) %>% 
  summarize(value = sum(value)) %>% 
  ungroup() %>% 
  rename(Source_label = Source,
         Target_label = Target) %>% 
  left_join(node_mapping_e %>% select(label, node), by = c('Source_label' = 'label')) %>% 
  rename(Source_node = node) %>% 
  left_join(node_mapping_e %>% select(label, node), by = c('Target_label' = 'label')) %>% 
  rename(Target_node = node) %>% 
  left_join(node_mapping_e %>% select(label, stage, hex, color_name), by = c('Source_label' = 'label')) %>% 
  mutate(rgb = apply(FUN = paste, MARGIN = 2, X = col2rgb(hex), collapse = ',')) %>% 
  mutate(rgba = paste0('rgba(', rgb, ', ', link_alpha,')')) %>% 
  mutate(link_label = paste(Source_label, round(value, digits = 1),'EJ')) %>% 
  filter(value>0) %>% 
  arrange(Source_node)

datatable(links_data, filter = 'top', rownames = FALSE, options = list(pageLength = 10, scrollX = TRUE))
```


```{r fig.width=8, warning=FALSE}
# process node percent labels

# source
source_sum <- links_data %>% 
  select(Source_label, value) %>% 
  left_join(node_mapping_e %>% select(label, stage), by = c('Source_label' = 'label')) %>% 
  rename(label=Source_label) %>% 
  filter(tolower(stage) == 'source') %>% 
  group_by(label, stage) %>% 
  summarize(node_sum = sum(value))

source_total <- source_sum %>% 
  pull(node_sum) %>% sum

source_percent <- source_sum %>% 
  mutate(percent = node_sum/source_total*100) %>% 
  left_join(node_mapping_e) %>% 
  arrange(node) %>% 
  mutate(x = .01) %>% 
  mutate(csum_norm = source_total)
source_percent$csum <- cumsum(source_percent$node_sum)
source_percent$start <- lag(source_percent$csum)

# target
target_sum <- links_data %>% 
  select(Target_label, value) %>% 
  left_join(node_mapping_e %>% select(label, stage), by = c('Target_label' = 'label')) %>% 
  rename(label=Target_label) %>% 
  filter(stage == 'target') %>% 
  group_by(label, stage) %>% 
  summarize(node_sum = sum(value))

target_total <- target_sum %>% 
  pull(node_sum) %>% sum

target_percent <- target_sum %>% 
  mutate(percent = node_sum/target_total*100) %>% 
  left_join(node_mapping_e) %>% 
  arrange(node) %>% 
  mutate(x = .95) %>% 
  mutate(csum_norm = target_total)
target_percent$csum <- cumsum(target_percent$node_sum)
target_percent$start <- lag(target_percent$csum)

# Intermediate Carriers Flows in
intermediate_nodes <- node_mapping_e %>% filter(stage == 'mid') %>% pull(label)
 intermediate_flows_in_total <- links_data %>%
   filter(Target_label %in% intermediate_nodes) %>% 
   group_by(Target_label) %>% 
   summarize(node_sum = sum(value))
 
intermediate_percent <- intermediate_flows_in_total %>% 
 rename(label = Target_label) %>% 
 mutate(stage = 'mid') %>% 
 mutate(percent =node_sum/source_total*100) %>% 
 left_join(node_mapping_e)

intermediate_total <- intermediate_percent %>% pull(node_sum) %>% sum

intermediate_flows_out_total <- links_data %>%
 filter(Source_label %in% intermediate_nodes) %>% 
 group_by(Source_label) %>% 
 summarize(value = sum(value))
  
```


```{r fig.width=8, warning=FALSE}
# process node locations 

# final node info
nodes_data <- bind_rows(source_percent, intermediate_percent, target_percent) %>%
  arrange(node) %>%
  replace_na(list(start = 0)) %>% 
  mutate(mid_point = (start+csum)/2) %>% 
  mutate(y = mid_point/csum_norm) %>% 
  mutate(y = ifelse(label == 'Gas', 0.5,
                    ifelse(label == 'Liquid Fuels', 0.2,
                    ifelse(label == 'Electricity', 0.6,
                    ifelse(label == 'Hydrogen',0.9,y))))) %>% 
  mutate(x = ifelse(label == 'Gas', 0.25,
                    ifelse(label == 'Liquid Fuels', 0.4,
                           ifelse(label == 'Electricity', 0.6,
                                  ifelse(label == 'Hydrogen', 0.7,x))))) %>%
  mutate(node_label = ifelse(is.na(node_sum), label, 
                               paste0(label, ' ',round(node_sum, digits = 1) , gcam_data_unit, 
                                      ' ', round(percent, digits = 1),'%'))) 
  

# Check that Source and Targets in Links are in the node mapping

if( any(is.na(links_data$Source_node)) ) stop("Check Source number mapping - NA's")
if( any(is.na(links_data$Target_node)) ) stop("Check Target number mapping - NA's")
  
datatable(nodes_data, filter = 'top', rownames = FALSE, options = list(pageLength = 20, scrollX = TRUE))
```


```{r fig.width=8, warning=FALSE}
# save files for Kendall
names(Source_Target_all_losses) <- tolower(names(Source_Target_all_losses)) 
write_csv(Source_Target_all_losses %>% select(scenario, source, target, year, value, units), paste0("../", data_dir, 'allenergy_source_target.csv'))
# write_csv(nodes_data, paste0("../", data_dir, 'allenergy_nodes_data.csv'))
# write_csv(links_data, paste0("../", data_dir, 'allenergy_links_data.csv'))

```


```{r fig.width=8, warning=FALSE}
# plot sankey
sankey_figure <- plot_ly( 
      type = "sankey",
      # arrangement = "snap",
      domain = list(x =  c(0,1),y =  c(0,1)),
      orientation = "h",
      valueformat = ".0f",
      valuesuffix = gcam_data_unit,

# Nodes  
      node = list( label = nodes_data %>% pull(node_label),
                   color = nodes_data %>% pull(hex),
                   x = nodes_data %>% pull(x),
                   y = nodes_data %>% pull(y),
                   pad = 3,
                   thickness = 15,
                   line = list(color = "black",width = 0.5)),
  
# Links
      link = list(source = links_data$Source_node,
                  target = links_data$Target_node,
                  value =  links_data$value,
                  color =  links_data$rgba)
) 

# add Formatting
plot_title <- paste0('Energy - ', plot_scenario_name, ' - ',select_year)
sankey_figure <- sankey_figure %>% layout(
  title = plot_title,
  font = list(size = 11),
  xaxis = list(showgrid = F, zeroline = F),
  yaxis = list(showgrid = F, zeroline = F))

sankey_figure


```



```{r fig.width=8, warning=FALSE}


```






